Subsetting States

Approaches to Choosing States to Run at the County Level

  • Idea 1 – see which states have wastewater to data to compare to, which would provide another source of information to compare the trends we see.
    • Problem: most states do not have many counties with a substantial amount of wastewater data, so this comparison is only possible for a limited number of counties.
    • Massachusetts happens to be a state with multiple counties with comprehensive wastewater data.

Idea 2 – select states based on survey sample size relative to total population.

Wastewater Data from Biobot Analytics

biobot_link <- "https://raw.githubusercontent.com/biobotanalytics/covid19-wastewater-data/master/wastewater_by_county.csv"


w_data <- read_csv(biobot_link)%>% 
  filter(sampling_week >= ymd("2021-03-01") &  sampling_week <= ymd("2022-03-01")) %>%
  mutate(fips = factor(fipscode)) %>%
  select(-fipscode)

Counties with the Most Wastewater Data Reports

# counties with the most wastewater data
w_data %>% 
  group_by(name,fips) %>%
  summarize(n=n()) %>%
  arrange(desc(n))

Number of Counties with > 35 Observations by State

w_data %>% 
  group_by(name,fips, state) %>%
  summarize(n=n()) %>%
  arrange(desc(n)) %>%
  filter(n > 35) %>%
  group_by(state) %>%
  summarize(n_counties = n()) %>%
  arrange(desc(n_counties))
####################################################################
# COMPARE WASTEWATER TO CORRECTED CASES
####################################################################

state <- "ma"
priors_versions <- c("v1", "v2", "v3", "v4")


versions <- tibble(
  version = c("v1", "v2", "v3", "v4"),
  vlabel = c("Priors Do Not Vary by County and Date", 
             "Distr. for Beta Centered at Empirical Value",
             "Distr. for P(S_1|untested) and Beta Centered at Empirical Values",
             "Distr. for P(S_1|untested) Centered at Empirical Value")
)


state_corrected_path <-  paste0(here::here(), "/analysis/results/adj_biweekly_county/", state, "/")


################################
# ESTIMATED
################################
dates <- readRDS(paste0(here::here(), "/data/date_to_biweek.RDS"))

corrected <- map_df(priors_versions, ~readRDS(
        paste0(state_corrected_path, "adj_",
               .x, 
               ".RDS")) %>% 
          mutate(version = .x)) %>%
  left_join(dates)

corrected <- corrected %>%
  left_join(versions)


pal <- c("#74A09F", "#A0748B", 
         "#748BA0", "#A08974",
         "#D49E9F", "#D4B89E", "#AFCFE5")
compare_county_wastewater <- function(county_fips, end_date ="2021-12-15") {
  
  county_name <- w_data %>% 
    filter(fips == county_fips) %>%
    pull(name)
    
  custom_title = paste0("Comparing Wastewater Concentration to Corrected Estimates for ",
                 county_name, "\nFIPS: ", county_fips)
  
  end_date <- ymd(end_date)
  
  w_data_for_county <- w_data %>%
    rename(date = sampling_week) %>%
    filter(fips == county_fips & date <= end_date)
  
  if(nrow(w_data_for_county) == 0) {
    message(paste0("No wastewater data for FIPS: ", 
                   county_fips));
    return()}
  
  begin_date <- min(w_data_for_county$date)
  
  
  adj <- corrected %>%filter(fips == county_fips & date <= end_date) %>% pull(exp_cases_ub) %>% max()
  
  conc_max <- max(w_data_for_county$effective_concentration_rolling_average) 
  
  adj <- conc_max/adj
  
  corrected %>%
    filter(fips == county_fips  & date >= begin_date & date <= end_date) %>%
    ggplot() +
    geom_ribbon(aes(x = date, 
               ymin = exp_cases_lb*adj,
               ymax = exp_cases_ub*adj,
               fill = vlabel),
               alpha = .7) +
    geom_line(data = w_data_for_county, 
              aes(x = date, y =effective_concentration_rolling_average ),
              color = "#DB4048",
              size = 1.1) +
    geom_point(data = w_data_for_county, 
              aes(x = date, y =effective_concentration_rolling_average ),
              color = "#DB4048",
              alpha = .5,
              size = 1.2) +
    facet_wrap(~vlabel) +
    scale_fill_manual(values = pal) +
    theme_bw() +
    theme(
      legend.position = "none",
      plot.title = element_text(face = "bold", size = 16, hjust = .5),
      axis.title = element_text(size = 18),
      strip.text = element_text(size = 14)
    )+
    scale_y_continuous(sec.axis = sec_axis(~./adj,
                                       name = "Corrected Infection Estimates",
                                       labels = comma),
                       labels = comma) +
    labs(y = "Effective Concentration Rolling Average",
         title = custom_title) +
    scale_x_date(date_labels = "%b %Y")
}

# compare_county_wastewater(county_fips = "25023")

CTIS Survey Data Sample Size

fb_symptoms <- readRDS(
  paste0(
    here::here(), 
    "/data/state_level/screeningpos_all_states.RDS"))

state_pop <- readRDS(paste0(here::here(), 
                            "/data/demographic/state_pop.RDS")) %>%
  select(POPESTIMATE2019, NAME) 

state_codes <- read_csv(paste0(here::here(), "/data/demographic/statecodes.csv"))

state_pop <- state_pop %>%
  left_join(state_codes, by = c("NAME" = "state")) %>% 
  filter(!is.na(code)) %>%
  rename_with(.cols = everything(), tolower) %>%
  mutate(code = tolower(code)) %>%
  select(pop = popestimate2019, state = code)


fb_symptoms <- fb_symptoms %>%
  left_join(state_pop, by = c("state"="state")) %>%
  mutate(prop_sampled = sample_size / pop) 
  
fb_sample_size <- fb_symptoms %>%
  pivot_wider(names_from = signal, values_from = value) %>%
  select(state,date, sample_size, prop_sampled) 

fb_sample_size <- fb_sample_size %>% 
  group_by(state) %>%
  mutate(mean_sampled = median(prop_sampled)) %>%
  ungroup() %>%
  mutate(quantile_sampled = ntile(mean_sampled, n = 4)) %>%
  group_by(state) %>%
  # ensure one quantile per state
  mutate(quantile_sampled = max(quantile_sampled)) 
# plot distribution of population sampled by state,
# split into quantiles

fb_sample_size %>%
  mutate(quantile_sampled_fact = factor(
    quantile_sampled, levels = unique(quantile_sampled ))) %>%
  ggplot(aes(x = prop_sampled,
             fill =fct_reorder(quantile_sampled_fact, 
                               quantile_sampled))) +
  geom_density() +
  facet_wrap(~fct_reorder(state, quantile_sampled)) +
  theme_bw() +
  viridis::scale_fill_viridis(discrete = TRUE, 
                              end = .9,
                              direction = -1) +
  labs(fill = "Quantile",
       x = "Proportion of Population Sampled",
       title = "Distribution of the Proportion of Population Sampled by State",
       subtitle = "Divided into 4 Quantiles") +
  theme(plot.title = element_text(face="bold", hjust = .5, size = 20),
          plot.subtitle = element_text(face = "italic",
                                       hjust = .5,
                                       size = 18))